export PYTHONPATH=.
  # --input-len=256 \
  # --output-len=1024 \
  # --num-prompts=32 \

python ./benchmarks/benchmark_throughput.py \
  --model=/data/mtt/model_convert/llama-2-70b-chat-hf-fp16-convert-tp4-new/ \
  --gpu-memory-utilization=0.8 \
  --num-prompts=100 \
  --max-num-batched-tokens=8192 \
  --dataset=/data/mtt/dataset/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json \
  --device="musa" \
  --tensor-parallel-size=4
